home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Deutsche Edition 1
/
Deutsche Edition 1.iso
/
amok
/
031-040
/
amok35
/
spellchecker
/
lexi.mod
< prev
next >
Wrap
Text File
|
1993-11-04
|
14KB
|
556 lines
(*********************************************************************
:Program. Lexi.mod
:Contents. Basic funktions for Spellchecker
:Author. Stefan Salewski
:Copyright. PD
:Language. Modula-2
:Translator. M2Amiga AMSoft V3.3d
:History. V1.0 1.Mar.1990
:Address. Stolper Weg 3, D-2160 Stade
:Imports. TurboFilesV1.1, Assembler2V1.1 (my own)
:Imports. MemSystem (Nicolas Benezan)
*********************************************************************)
IMPLEMENTATION MODULE Lexi;
FROM Assembler2 IMPORT MaxCard,MinCard,MinInt,Equal;
FROM Arts IMPORT Assert,Error;
FROM Exec IMPORT UByte;
FROM SYSTEM IMPORT ADR,CAST;
FROM MemSystem IMPORT Allocate,Deallocate,MinMemory,Hysteresis;
IMPORT Str;
FROM Strings IMPORT Copy,Delete;
FROM TurboFiles IMPORT Lookup,CloseFile,TurboRead,TurboWrite,
TurboSetPos,SetPosMode,DeleteFile,
TurboGetPos,TurboResult,ReadOnly,NewFile,FilePtr;
CONST
VeryLongWordLength=2*MaxWordLength;
LexID='Lexikon (c) 1990 by Stefan Salewski';
Save=TRUE;
Load=FALSE;
ESC= 33C;
CSI=233C;
EOL=CHAR(10);
TFRB= 1024; (* TurboFilesReadBuffer *)
TFWB=10240; (* TurboFilesWriteBuffer *)
TYPE
FileName=ARRAY[0..80] OF CHAR;
LexEntry=RECORD
word:Word;
count:UByte;
END;
VAR
lexID:ARRAY[0..SIZE(LexID)] OF CHAR;
lexPtr:POINTER TO ARRAY [1..MAX(CARDINAL)] OF LexEntry;
wordsInLex:CARDINAL;
LexSize:CARDINAL; (* This is a pseudo-Constant, so I use upper Case *)
PROCEDURE Letter(c:CHAR):BOOLEAN;
BEGIN
IF ((c>='a') AND (c<='z')) OR ((c>='A') AND (c<='Z')) OR (c="'") THEN
RETURN TRUE
ELSIF (c>=CHAR(192)) THEN
RETURN (c#CHAR(215)) AND (c#CHAR(247))
ELSE
RETURN FALSE
END
END Letter;
PROCEDURE Cap(c:CHAR):CHAR;
BEGIN
IF ((c>='a') AND (c<='z')) OR ((c>=CHAR(224)) AND (c#CHAR(255)) AND
(c#CHAR(247))) THEN
RETURN CAST(CHAR,CAST(UByte,c)-32)
ELSE
RETURN c
END
END Cap;
PROCEDURE IsCap(c:CHAR):BOOLEAN;
BEGIN
RETURN((c>='A') AND (c<='Z')) OR ((c>=CHAR(192)) AND (c<=CHAR(222)) AND
(c#CHAR(215)))
END IsCap;
PROCEDURE AllocLex;
VAR
fileName:FileName;
BEGIN
fileName:=TmpLexName;
IF SaveLex(fileName) THEN
IF LoadLex(fileName) THEN
IF DeleteFile(fileName) THEN END
ELSE
Error(ADR('OutOfMem! Lexikon is saved as:'),
ADR(TmpLexName));
END;
ELSE
Error(ADR('To Insert more Words I need:'),
ADR(TmpLexName));
END;
END AllocLex;
PROCEDURE InsertWord(VAR w:Word;pos:CARDINAL;count:UByte);
VAR
i:CARDINAL;
BEGIN
IF (pos=0) OR (Str.Length(w)<MinWordLength) THEN RETURN END;
IF wordsInLex=LexSize THEN
AllocLex
END;
FOR i:=wordsInLex TO pos BY -1 DO
lexPtr^[i+1]:=lexPtr^[i]
END;
lexPtr^[pos].count:=count;
lexPtr^[pos].word:=w;
INC(wordsInLex)
END InsertWord;
PROCEDURE InitLex;
BEGIN
wordsInLex:=0;
END InitLex;
PROCEDURE Init;
BEGIN
MinMemory:=1024*100;
Hysteresis:=1024*50;
Allocate(lexPtr,MinWords*SIZE(LexEntry));
Assert(lexPtr#NIL,ADR('Not enough Memory for Lexikon!'));
LexSize:=MinWords;
wordsInLex:=0;
END Init;
PROCEDURE Del(pos:CARDINAL);
VAR
i:CARDINAL;
BEGIN
FOR i:=pos TO wordsInLex-1 DO
lexPtr^[i]:=lexPtr^[i+1];
END;
DEC(wordsInLex);
END Del;
PROCEDURE WordsInLex():CARDINAL;
BEGIN
RETURN wordsInLex
END WordsInLex;
PROCEDURE BinSearch(VAR s:Word;VAR pos:CARDINAL;inc:UByte):BOOLEAN;
VAR
l,m,r,i:CARDINAL;
BEGIN
l:=1;
r:=wordsInLex+1;
WHILE l<r DO
m:=(l+r) DIV 2;
i:=0;
WHILE (lexPtr^[m].word[i]=s[i]) AND (s[i]#0C) DO
INC(i)
END;
IF (lexPtr^[m].word[i]<s[i]) THEN
l:=m+1
ELSE
r:=m
END;
END;
pos:=r;
IF r<=wordsInLex THEN
i:=0;
WHILE (lexPtr^[r].word[i]=s[i]) AND (s[i]#0C) DO
INC(i)
END;
IF (s[i]>lexPtr^[r].word[i]) THEN
INC(pos);
RETURN FALSE
ELSIF (s[i]=lexPtr^[r].word[i]) THEN
IF lexPtr^[pos].count<=MAX(UByte)-inc THEN
INC(lexPtr^[pos].count,inc);
END;
RETURN TRUE
END;
END;
RETURN FALSE
END BinSearch;
PROCEDURE DeleteWord(VAR w:Word):BOOLEAN;
VAR
pos,i:CARDINAL;
BEGIN
IF BinSearch(w,pos,0) THEN
Del(pos);
RETURN TRUE
ELSE
RETURN FALSE
END;
END DeleteWord;
PROCEDURE SearchString(VAR w:ARRAY OF CHAR;VAR pos:CARDINAL;
inc:UByte):BOOLEAN;
VAR
right,left:Word;
undoBuf:ARRAY[0..2*MaxWordLength] OF CHAR;
len,h,startLen:INTEGER;
p:CARDINAL;
up,bufferFree:BOOLEAN;
BEGIN
bufferFree:=TRUE;
startLen:=Str.Length(w);
len:=startLen;
up:=IsCap(w[0]);
h:=MinInt(len,MaxWordLength);
Copy(left,w,0,h);
Copy(right,w,len-h,h);
WHILE h>=MinFracSizeS DO
IF up THEN right[0]:=Cap(right[0]) END;
IF BinSearch(left,p,inc) THEN
DEC(len,h);
IF len=0 THEN RETURN TRUE END;
IF (len<MinFracSizeI) AND bufferFree THEN
Str.Copy(undoBuf,w);
bufferFree:=FALSE;
END;
Delete(w,0,h);
IF up THEN w[0]:=Cap(w[0]) END;
h:=MinInt(len,MaxWordLength);
Copy(left,w,0,h);
Copy(right,w,len-h,h);
ELSIF BinSearch(right,p,inc) THEN
DEC(len,h);
IF len=0 THEN RETURN TRUE END;
IF (len<MinFracSizeI) AND bufferFree THEN
Str.Copy(undoBuf,w);
bufferFree:=FALSE;
END;
w[len]:=0C;
h:=MinInt(len,MaxWordLength);
Copy(left,w,0,h);
Copy(right,w,len-h,h);
ELSE
DEC(h);
left[h]:=0C;
Delete(right,0,1);
END;
END;
(*IF len=0 THEN
RETURN TRUE
ELS*)IF len>MaxWordLength THEN
pos:=0;
RETURN FALSE
ELSIF (len>=MinFracSizeI) OR (len=startLen) THEN
Str.Copy(left,w);
RETURN BinSearch(left,pos,inc)
ELSE
IF NOT bufferFree AND (Str.Length(undoBuf)<=MaxWordLength) THEN
Str.Copy(w,undoBuf);
Str.Copy(left,undoBuf);
RETURN BinSearch(left,pos,inc)
ELSE
pos:=0;
RETURN FALSE
END;
END;
END SearchString;
PROCEDURE ReadWord(source,echo:FilePtr;
VAR str:ARRAY OF CHAR;VAR info:InfoSet;
minLength,maxLength:CARDINAL);
VAR
actual,startPos:LONGINT;
get,strLength,pos:CARDINAL;
c:CHAR;
BEGIN
IF TurboGetPos(source)=0 THEN
info:=InfoSet{cap}
ELSE
info:=InfoSet{}
END;
strLength:=MinCard(HIGH(str),maxLength);
IF strLength=0 THEN str[0]:=0C; RETURN END;
REPEAT
LOOP
(* Ignore Control-Sequences... concept stolen from Muchmore V1.5 *)
TurboRead(source,ADR(c),1,actual);
IF echo#NIL THEN
TurboWrite(echo,ADR(c),actual)
END;
IF (c#ESC) AND (c#CSI) THEN EXIT END;
REPEAT
TurboRead(source,ADR(c),1,actual);
IF echo#NIL THEN
TurboWrite(echo,ADR(c),actual)
END;
c:=CAP(c);
UNTIL ((c>="?") AND (c<="Z")) OR (source^.res#done);
END;
IF (c='.') OR (c='?') OR (c='!') OR (c=':') THEN
INCL(info,cap)
END;
UNTIL Letter(c) OR (source^.res#done);
str[0]:=c;
startPos:=TurboGetPos(source);
pos:=0;
WHILE Letter(str[pos]) AND (source^.res=done) DO
IF pos<strLength THEN
INC(pos)
END;
TurboRead(source,ADR(str[pos]),1,actual);
IF echo#NIL THEN
TurboWrite(echo,ADR(str[pos]),actual)
END;
END;
IF (str[pos]='-') THEN
INCL(info,hyphen)
ELSIF (str[pos]=CHAR(4)) THEN
INCL(info,bHyphen)
END;
get:=TurboGetPos(source)-startPos;
IF TurboSetPos(source,-1,current) THEN END;
IF (echo#NIL) AND TurboSetPos(echo,-1,current) THEN END;
IF (actual#0) AND (get<=strLength) AND (get>=minLength) THEN
str[pos]:=0C;
ELSE
str[0]:=0C
END;
END ReadWord;
PROCEDURE ExpandLex(VAR textName:ARRAY OF CHAR):BOOLEAN;
VAR
info:InfoSet;
runs:CARDINAL;
fPtr:FilePtr;
cap:BOOLEAN;
w:ARRAY[0..VeryLongWordLength] OF CHAR;
word:Word;
count:UByte;
pos:CARDINAL;
nil:ARRAY[0..4] OF CHAR;
BEGIN
nil:='NIL:';
IF Lookup(fPtr,textName,TFRB,ReadOnly)=done THEN
runs:=MinCard(MinFracSizeI,MinFracSizeS)-1;
count:=0;
REPEAT
IF runs>=MaxWordLength THEN
runs:=VeryLongWordLength;
count:=1
ELSE
INC(runs,MinFracSizeS)
END;
IF fPtr^.res=endOfFile THEN fPtr^.res:=done END;
IF TurboSetPos(fPtr,0,beginning) THEN END;
REPEAT
ReadWord(fPtr,NIL,w,info,MinWordLength,runs);
IF hyphen IN info THEN
ReadWord(fPtr,NIL,w,info,MinWordLength,runs);
ReadWord(fPtr,NIL,w,info,MinWordLength,runs);
ELSIF bHyphen IN info THEN
ReadWord(fPtr,NIL,w,info,MinWordLength,runs);
ReadWord(fPtr,NIL,w,info,MinWordLength,runs);
ReadWord(fPtr,NIL,w,info,MinWordLength,runs);
END;
IF (info=InfoSet{}) AND (w[0]#0C) AND NOT IsCap(w[1]) THEN
IF NOT SearchString(w,pos,count) AND (pos#0) THEN
Str.Copy(word,w);
InsertWord(word,pos,count);
END;
END;
UNTIL fPtr^.res#done;
UNTIL runs=VeryLongWordLength;
CloseFile(fPtr);
IF MinCount()=0 THEN
count:=CleanLex(nil)
END;
RETURN TRUE
ELSE
RETURN FALSE
END;
END ExpandLex;
PROCEDURE SaveLex(VAR lexName:ARRAY OF CHAR):BOOLEAN;
VAR
i,entries:CARDINAL;
fPtr:FilePtr;
res:TurboResult;
BEGIN
entries:=wordsInLex;
IF Lookup(fPtr,lexName,TFWB,NewFile)=done THEN
TurboWrite(fPtr,ADR(LexID),SIZE(LexID));
TurboWrite(fPtr,ADR(entries),SIZE(entries));
FOR i:=1 TO entries DO
TurboWrite(fPtr,ADR(lexPtr^[i].word),Str.Length(lexPtr^[i].word)+1);
TurboWrite(fPtr,ADR(lexPtr^[i].count),SIZE(lexPtr^[i].count));
END;
res:=fPtr^.res;
CloseFile(fPtr);
RETURN res=done
ELSE
RETURN FALSE
END;
END SaveLex;
PROCEDURE LoadLex(VAR lexName:ARRAY OF CHAR):BOOLEAN;
VAR
fPtr:FilePtr;
i,entries:CARDINAL;
j:INTEGER;
actual:LONGINT;
ok:BOOLEAN;
BEGIN
IF Lookup(fPtr,lexName,TFRB,ReadOnly)=done THEN
TurboRead(fPtr,ADR(lexID),SIZE(LexID),actual);
ok:=Equal(ADR(lexID),ADR(LexID),SIZE(LexID));
IF ok THEN
TurboRead(fPtr,ADR(entries),SIZE(entries),actual);
IF entries+MoreWords>LexSize THEN
Deallocate(lexPtr);
Allocate(lexPtr,LONGINT(entries+MoreWords)*SIZE(LexEntry));
Assert(lexPtr#NIL,ADR('Not enough Memory for Lexikon'));
LexSize:=entries+MoreWords
END;
i:=1;
WHILE (fPtr^.res=done) AND (i<=entries) DO
j:=-1;
REPEAT
INC(j);
TurboRead(fPtr,ADR(lexPtr^[i].word[j]),1,actual);
UNTIL (lexPtr^[i].word[j]=0C) OR (j=MaxWordLength);
TurboRead(fPtr,ADR(lexPtr^[i].count),SIZE(lexPtr^[i].count),actual);
INC(i);
END;
IF fPtr^.res=done THEN
wordsInLex:=entries;
ELSE
wordsInLex:=0;
ok:=FALSE
END;
END;
CloseFile(fPtr);
RETURN ok
ELSE
RETURN FALSE
END;
END LoadLex;
PROCEDURE MinCount():UByte;
VAR
i:CARDINAL;
min:UByte;
BEGIN
min:=MAX(UByte);
FOR i:=1 TO wordsInLex DO
IF lexPtr^[i].count<min THEN
min:=lexPtr^[i].count
END;
END;
IF wordsInLex=0 THEN
RETURN 0
ELSE
RETURN min
END;
END MinCount;
PROCEDURE MaxCount():UByte;
VAR
i:CARDINAL;
max:UByte;
BEGIN
max:=0;
FOR i:=1 TO wordsInLex DO
IF lexPtr^[i].count>max THEN
max:=lexPtr^[i].count
END;
END;
RETURN max
END MaxCount;
(*PROCEDURE CleanLex(VAR cleanName:ARRAY OF CHAR):UByte;
VAR
fPtr:FilePtr;
i:CARDINAL;
min:UByte;
eol:CHAR;
BEGIN
eol:=EOL;
min:=MinCount();
IF Lookup(fPtr,cleanName,TFWB,NewFile)=done THEN
i:=1;
WHILE i<=wordsInLex DO
IF lexPtr^[i].count=min THEN
TurboWrite(fPtr,ADR(lexPtr^[i].word),Str.Length(lexPtr^[i].word));
TurboWrite(fPtr,ADR(eol),1);
Del(i);
ELSE
INC(i);
END;
END;
CloseFile(fPtr);
RETURN min
ELSE
RETURN 0
END;
END CleanLex;*)
PROCEDURE CleanLex(VAR cleanName:ARRAY OF CHAR):UByte;
VAR
fPtr:FilePtr;
source,dest:CARDINAL;
min:UByte;
eol:CHAR;
BEGIN
eol:=EOL;
min:=MinCount();
IF Lookup(fPtr,cleanName,TFWB,NewFile)=done THEN
source:=1;
dest:=1;
LOOP
WHILE (source<=wordsInLex) AND (lexPtr^[source].count=min) DO
TurboWrite(fPtr,ADR(lexPtr^[source].word),
Str.Length(lexPtr^[source].word));
TurboWrite(fPtr,ADR(eol),1);
INC(source);
END;
IF source<=wordsInLex THEN
lexPtr^[dest]:=lexPtr^[source];
INC(dest);
INC(source);
ELSE
EXIT
END;
END;
wordsInLex:=dest-1;
CloseFile(fPtr);
RETURN min
ELSE
RETURN 0
END;
END CleanLex;
PROCEDURE ExportLex(VAR exportName:ARRAY OF CHAR):BOOLEAN;
VAR
fPtr:FilePtr;
i:CARDINAL;
res:TurboResult;
eol:CHAR;
BEGIN
eol:=EOL;
IF Lookup(fPtr,exportName,TFWB,NewFile)=done THEN
FOR i:=1 TO wordsInLex DO
TurboWrite(fPtr,ADR(lexPtr^[i].word),Str.Length(lexPtr^[i].word));
TurboWrite(fPtr,ADR(eol),1);
END;
res:=fPtr^.res;
CloseFile(fPtr);
RETURN res=done
ELSE
RETURN FALSE
END;
END ExportLex;
BEGIN
Init;
END Lexi.